{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([0.13223698, 0.39830518, 0.93960761, 0.3007807 , 0.59217994,\n",
       "        0.92562934, 0.92710191, 0.01909585, 0.20277616, 0.29105418]),\n",
       " [[1], [1], [1], [1], [1], [1], [1], [1], [1], [1]])"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "#每个老虎机的中奖概率,0-1之间的均匀分布\n",
    "probs = np.random.uniform(size=10)\n",
    "\n",
    "#记录每个老虎机的返回值\n",
    "rewards = [[1] for _ in range(10)]\n",
    "\n",
    "probs, rewards"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "当数字小的时候,beta分布的概率有很大的随机性\n",
      "0.9566924357894874\n",
      "0.796533273269566\n",
      "0.14083572337004413\n",
      "0.3350811260642629\n",
      "0.5601835883123273\n",
      "当数字大时,beta分布逐渐稳定\n",
      "0.4980336738406946\n",
      "0.5014911804072641\n",
      "0.49954932416995235\n",
      "0.49752638673683025\n",
      "0.5003858155869424\n"
     ]
    }
   ],
   "source": [
    "#beta分布测试\n",
    "print('当数字小的时候,beta分布的概率有很大的随机性')\n",
    "for _ in range(5):\n",
    "    print(np.random.beta(1, 1))\n",
    "\n",
    "print('当数字大时,beta分布逐渐稳定')\n",
    "for _ in range(5):\n",
    "    print(np.random.beta(1e5, 1e5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import random\n",
    "\n",
    "\n",
    "def choose_one():\n",
    "    #求出每个老虎机出1的次数+1\n",
    "    count_1 = [sum(i) + 1 for i in rewards]\n",
    "\n",
    "    #求出每个老虎机出0的次数+1\n",
    "    count_0 = [sum(1 - np.array(i)) + 1 for i in rewards]\n",
    "\n",
    "    #按照beta分布计算奖励分布,这可以认为是每一台老虎机中奖的概率\n",
    "    beta = np.random.beta(count_1, count_0)\n",
    "\n",
    "    return beta.argmax()\n",
    "\n",
    "\n",
    "choose_one()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[1], [1], [1], [1], [1], [1], [1], [1], [1], [1, 0]]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def try_and_play():\n",
    "    i = choose_one()\n",
    "\n",
    "    #玩老虎机,得到结果\n",
    "    reward = 0\n",
    "    if random.random() < probs[i]:\n",
    "        reward = 1\n",
    "\n",
    "    #记录玩的结果\n",
    "    rewards[i].append(reward)\n",
    "\n",
    "\n",
    "try_and_play()\n",
    "\n",
    "rewards"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 312
    },
    "executionInfo": {
     "elapsed": 676,
     "status": "ok",
     "timestamp": 1649954384006,
     "user": {
      "displayName": "Sam Lu",
      "userId": "15789059763790170725"
     },
     "user_tz": -480
    },
    "id": "wIHh_wRA8YDz",
    "outputId": "d5d65ff2-744d-44e2-ec8a-eb78d13397c2",
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4698.038052814513, 4680)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_result():\n",
    "    #玩N次\n",
    "    for _ in range(5000):\n",
    "        try_and_play()\n",
    "\n",
    "    #期望的最好结果\n",
    "    target = probs.max() * 5000\n",
    "\n",
    "    #实际玩出的结果\n",
    "    result = sum([sum(i) for i in rewards])\n",
    "\n",
    "    return target, result\n",
    "\n",
    "\n",
    "get_result()"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "第2章-多臂老虎机问题.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
